{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "csv文件数据的规模，行数：297882, 列数：13\n"
     ]
    }
   ],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "import matplotlib.pyplot as plt\n",
    "from sklearn.decomposition import PCA\n",
    "\n",
    "data = pd.read_csv(\"ST12000NM0007.csv\")\n",
    "print('csv文件数据的规模，行数：%d, 列数：%d' % (data.shape[0], data.shape[1]))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>date</th>\n",
       "      <th>model</th>\n",
       "      <th>failure</th>\n",
       "      <th>smart_5_raw</th>\n",
       "      <th>smart_9_raw</th>\n",
       "      <th>smart_187_raw</th>\n",
       "      <th>smart_188_raw</th>\n",
       "      <th>smart_193_raw</th>\n",
       "      <th>smart_194_raw</th>\n",
       "      <th>smart_197_raw</th>\n",
       "      <th>smart_198_raw</th>\n",
       "      <th>smart_241_raw</th>\n",
       "      <th>smart_242_raw</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2019-01-03</td>\n",
       "      <td>ST12000NM0007</td>\n",
       "      <td>1</td>\n",
       "      <td>0.0</td>\n",
       "      <td>8567.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>413.0</td>\n",
       "      <td>21.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>4.736282e+10</td>\n",
       "      <td>7.763323e+10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2019-01-03</td>\n",
       "      <td>ST12000NM0007</td>\n",
       "      <td>1</td>\n",
       "      <td>28016.0</td>\n",
       "      <td>10693.0</td>\n",
       "      <td>22.0</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>671.0</td>\n",
       "      <td>32.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>5.648538e+10</td>\n",
       "      <td>9.411143e+10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2019-01-03</td>\n",
       "      <td>ST12000NM0007</td>\n",
       "      <td>1</td>\n",
       "      <td>5220.0</td>\n",
       "      <td>8568.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>4.295033e+09</td>\n",
       "      <td>475.0</td>\n",
       "      <td>25.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>4.736176e+10</td>\n",
       "      <td>7.851910e+10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2019-01-03</td>\n",
       "      <td>ST12000NM0007</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>10470.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>1364.0</td>\n",
       "      <td>31.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>5.361354e+10</td>\n",
       "      <td>8.780835e+10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>2019-01-03</td>\n",
       "      <td>ST12000NM0007</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>5738.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>367.0</td>\n",
       "      <td>29.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>4.135032e+10</td>\n",
       "      <td>5.604366e+10</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "         date          model  failure  smart_5_raw  smart_9_raw  \\\n",
       "0  2019-01-03  ST12000NM0007        1          0.0       8567.0   \n",
       "1  2019-01-03  ST12000NM0007        1      28016.0      10693.0   \n",
       "2  2019-01-03  ST12000NM0007        1       5220.0       8568.0   \n",
       "3  2019-01-03  ST12000NM0007        0          0.0      10470.0   \n",
       "4  2019-01-03  ST12000NM0007        0          0.0       5738.0   \n",
       "\n",
       "   smart_187_raw  smart_188_raw  smart_193_raw  smart_194_raw  smart_197_raw  \\\n",
       "0            0.0   0.000000e+00          413.0           21.0            0.0   \n",
       "1           22.0   0.000000e+00          671.0           32.0            0.0   \n",
       "2            1.0   4.295033e+09          475.0           25.0            0.0   \n",
       "3            0.0   0.000000e+00         1364.0           31.0            0.0   \n",
       "4            0.0   0.000000e+00          367.0           29.0            0.0   \n",
       "\n",
       "   smart_198_raw  smart_241_raw  smart_242_raw  \n",
       "0            0.0   4.736282e+10   7.763323e+10  \n",
       "1            0.0   5.648538e+10   9.411143e+10  \n",
       "2            0.0   4.736176e+10   7.851910e+10  \n",
       "3            0.0   5.361354e+10   8.780835e+10  \n",
       "4            0.0   4.135032e+10   5.604366e+10  "
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "valid hdds: 296400\n",
      "failed hdds: 1482\n"
     ]
    }
   ],
   "source": [
    "valid = data[data['failure'] == 0] \n",
    "failed = data[data['failure'] == 1] \n",
    "print(\"valid hdds:\",len(valid)) \n",
    "print(\"failed hdds:\",len(failed))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "Y_data = data['failure']\n",
    "X_data = data.drop(['failure','date', 'model'],axis=1)\n",
    "\n",
    "\n",
    "from sklearn.preprocessing import MinMaxScaler, StandardScaler\n",
    "scaler = StandardScaler() # MinMaxScaler()\n",
    "X_data = scaler.fit_transform(X_data)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "array([0.26813012, 0.46814007, 0.57866606, 0.68222958, 0.78113628,\n",
      "       0.8780748 , 0.96698071, 0.99334292, 1.        , 1.        ])\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "[<matplotlib.lines.Line2D at 0x25019e28e20>]"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAmwAAAFKCAYAAACgiAOTAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAABJ0AAASdAHeZh94AAAv70lEQVR4nO3deVzVVf7H8TegAuqIGyjm+tPIEUS0UWwZl2baLNNUNLXMsiwXTMt0zNyXFswts7TcrSyXFnFmSnOyNJdMnEnTkBJFJXFfAQXu74/T5YagwhX4Xu59PR8PHt3vufcrHx7MPHx7zvecj5fNZrMJAAAALsvb6gIAAABwbQQ2AAAAF0dgAwAAcHEENgAAABdHYAMAAHBxBDYAAAAXR2ADAABwcQQ2AAAAF0dgAwAAcHGlrC6gMJw+fVobNmxQrVq15Ovra3U5AAAAV5Wenq6kpCS1bt1aFStWzNc9bhHYNmzYoI4dO1pdBgAAQL59+umn6tChQ74+6xaBrVatWpLMD96gQQOLqwEAALi6hIQEdezYMTu/5IdbBDb7MmiDBg0UGhpqcTUAAADXV5DHuNh0AAAA4OIIbAAAAC6OwAYAAODiCGwAAAAujsAGAADg4pwObOfPn9eYMWN03333qXLlyvLy8tLChQvzff/p06fVt29fBQYGqly5cmrbtq127NjhbDkAAABuy+nAdvz4cY0fP1579uxRkyZNCnRvVlaWHnjgAX3wwQcaOHCgXn/9daWkpKhNmzbat2+fsyUBAAC4JafPYQsODlZycrKqV6+u7du3q3nz5vm+d8WKFfruu++0fPlydenSRZLUtWtXhYSEaMyYMfrggw+cLQsAAMDtOB3YfH19Vb16dafuXbFihapVq6ZOnTpljwUGBqpr165aunSp0tPT6QkKAEBRs9mk1FTp4kXpwoWc/73aWFqa1VUXPX9/aeRIq6vIwZJOB3FxcWrWrJm8vXOuyLZo0UJz585VfHy8GjdunOe9KSkpOnbsWI6xhISEIqsVAADLXLqUvxB15Vh+P3/xotU/oWuqVInAJknJyclq1apVrvHg4GBJ0pEjR64a2GbPnq1x48YVaX0AABRYerqUkCAdO+ZceMrrvYwMq3+q3Ly9JS8vq6soWj4+VleQiyWBLTU1Nc8lTz8/v+z3r6Z///6KiorKMWZvogoAQJE7f17au1fas0f66Sfz3z17pF9+kTIzra3Ny0sqW1YqVy7nfwtrrGxZqXRpa39GD2VJYPP391d6enqu8bTf18X9/f2vem9QUJCCgoKKrDYAACRJx487wtgfw1lSkvN/pp/fjYWl693j6+v+s18eypLAZt9heiX7WI0aNYq7JACAJ7LZpMOHc86U2cPZ8ePXv9/HR2rQQPrznx1fNWua8HRlqPL3d8mlNpQMlgS2iIgIffvtt8rKysqx8WDr1q0qW7asQkJCrCgLAOCuMjKk/ftzL2Pu3SudO3f9+/38pFtukRo1yhnObr5ZKlOm6OuHxyvywJacnKwzZ86ofv36Kv37uneXLl20YsUKrVq1KvsctuPHj2v58uVq3749R3oAAJyTlibFx+dexoyPNzsurycgIGcgswe0OnWYHYOlbiiwzZo1S6dPn9aRI0ckSatXr9ahQ4ckSdHR0QoICNCIESO0aNEi7d+/X3Xr1pVkAlvLli31xBNP6KefflLVqlU1e/ZsZWZmsgMUAHB9Z8/mXMK0h7P9+6WsrOvfX61azkBm/woO5hkwuKQbCmxTpkzRgQMHsq9XrVqlVatWSZIeffRRBQQE5Hmfj4+P/vnPf+rFF1/UzJkzlZqaqubNm2vhwoW65ZZbbqQkAIC7sNnMERlXLmPu2WOeO8uPunXznjGrVKlISwcKm5fNZrNZXcSN2r17t8LCwrRr1y6FhoZaXQ4AoCCysszOy7x2ZJ48ef37S5VyPPj/xxmzW24xD/sDLsaZ3GLJpgMAgIdKSJB+/DH3cmZ+Ttz395caNsw9Y1a/Pg/+w+0R2AAARW/HDmnYMOmrr67/2YoVcy9h2h/8v6KlIeApCGwAgKKzf7/pyfjhh7nfq14990P/jRqZDQE8+A/kQGADABS+48eliROl2bOly5fNWKlS0rPPSj16mHBWsaKlJQIlCYENAFB4Ll6Upk+XXnvNHL1h17WrNGmS2RwAoMAIbACAG5eRIS1aJI0eLf1+NqckqU0b6fXXpebNLSsNcAcENgCA82w2KTZW+sc/zFEcdmFhZpbt/vt5Hg0oBAQ2AIBztmwxOz+//dYxVrOmNGGC9NhjtHICChGBDQBQMPHx0ksvSStXOsYCAsxYdLQ5Lw1AoSKwAQDy5+hRadw4ae5cKTPTjJUpY0LaSy9JlStbWx/gxghsAIBrO3dOeuMNacoU6cIFM+blJT36qFn+rFPH2voAD0BgAwDk7fJl6d13zaxaSopj/N57zYaCJk2sqw3wMAQ2AEBONpt5Pu2ll6R9+xzjzZqZIzr+9jfragM8FIENAODwzTdm5+fWrY6xevXMobfdutHLE7AIgQ0AIO3ebc5Si411jFWpIo0aZdpJ+fpaVxsAAhsAeLRDh6QxY6SFC6WsLDPm7y8NGWJm2gICLC0PgEFgAwBPdOaM9Oqrpu9nWpoZ8/aWnnxSGjtWuukmK6sDcAUCGwB4kvR0afZsaeJE6eRJx/hDD0mvvCI1amRdbQCuisAGAJ4gK0v68EPp5ZelxETHeMuWZufnX/9qWWkAro/ABgDubt068zxaXJxjLCTEzKg9/DDN2YESgP3ZAOCudu40h9zefbcjrFWrJr39trRrl9SpE2ENKCGYYQMAd5OYaI7jWLrUMVa+vPTii9Lzz5vXAEoUAhsAuIsTJ6TJk6VZs6RLl8xYqVLSM8+YAFetmrX1AXAagQ0ASrrUVGnmTPNM2pkzjvEuXUyAu/lm62oDUCgIbABQUmVmSosXS6NHmwNw7Vq1Mjs/IyOtqw1AoSKwAUBJY7NJ//ynaSW1a5djPDRUeu01qV07NhMAbobABgAlybZt5oiODRscYzfdJI0fLz3+uOTjY11tAIoMgQ0ASoKEBOmll6Tlyx1jAQFmlu2550z/TwBui8AGAK4sJcXMns2ZI2VkmLEyZaQBA6SRI6UqVaytD0CxILABgCs6f16aOlWKiTGv7Xr2NH1A69a1rDQAxY/ABgCu5PJlad48aexY6ehRx/jdd5sNBU2bWlYaAOsQ2ADAFdhs0uefS8OHSz//7Bhv2tQEtbvvtq42AJYjsAGA1X7+2Wwc+OILx1jdumbps3t3yZu2z4CnI7ABgFXOnTOhbNo0sxQqSZUqmTZS/ftLvr7W1gfAZRDYAKC42WzShx9KQ4dKyclmzMvL9PycOJGdnwByIbABQHH63/+kgQOlb791jN12m2nY3qyZdXUBcGk8GAEAxeHUKSk62mwisIe1atWkRYukjRsJawCuiRk2AChKWVnS/PnSiBHS8eNmzMfHbDIYPdp0KwCA6yCwAUBR2bbNLH9+/71j7K67pJkzTaN2AMgnlkQBoLClpEh9+kiRkY6wVquW6QO6bh1hDUCBEdgAoLBkZJjZs5AQswwqmb6fI0dKe/ZIXbqY3aAAUEAsiQJAYfj6a7OpYNcux9iDD5oz1ho0sKwsAO6BGTYAuBGHDpluBG3bOsJa/fpSbKy0ejVhDUChILABgDPS06VXX5UaNpSWLTNjZctKkyaZ4PbAA9bWB8CtsCQKAAX1r3+ZYzn27XOMde0qTZliNhcAQCEjsAFAfv36qzRkiPT5546x0FDpzTfNkigAFBGWRAHgei5eNIfcNmrkCGsVKpgNBXFxhDUARY4ZNgC4GptNWrVKev556eBBx/gTT0ivvGJaSwFAMSCwAUBe9uyRBg0yB93a3XqradLesqV1dQHwSCyJAsAfnT0rDR0qhYc7wlqVKtKcOdLWrYQ1AJZghg0AJLP8uXSpNGyY9NtvZszbW3r2WWnCBKlyZWvrA+DRCGwAEBdnuhRs2uQYu+MOs/wZEWFZWQBgx5IoAM918qTUv7/0l784wlr16tKSJdK33xLWALgMZtgAeJ7MTOm990xT9hMnzFipUtLgwdKoUebIDgBwIU7PsKWnp2v48OGqUaOG/P39FRkZqbVr1+br3nXr1qlt27aqWrWqKlasqBYtWmjJkiXOlgIA+bd5s9SihXk2zR7W7r5b+vFHKSaGsAbAJTkd2Hr37q2pU6eqZ8+emjFjhnx8fNSuXTtt3Ljxmvd9/vnnuueee3Tp0iWNHTtWkyZNkr+/v3r16qVp06Y5Ww4AXNtvv0m9e0u33y7t2GHG6tQx56x98YXpCQoALsrLZrPZCnrTtm3bFBkZqZiYGA0dOlSSlJaWprCwMAUFBem777676r333HOPdu/erV9//VW+vr6SpIyMDDVs2FDlypXTf//73wL/ELt371ZYWJh27dql0NDQAt8PwI1dvmw2D4wda47skCRfX2n4cPNVtqyl5QHwPM7kFqdm2FasWCEfHx/17ds3e8zPz099+vTR5s2blZSUdNV7z549q0qVKmWHNUkqVaqUqlatKn9/f2fKAYC8ffWV2Tjw/POOsNahg/TTT9K4cYQ1ACWGU4EtLi5OISEhqnDFsx4tWrSQJO3cufOq97Zp00a7d+/WqFGjlJCQoF9++UUTJkzQ9u3bNWzYMGfKAYCcDh6UunaV/v53E84k6eabpX/9S/r0U+n//s/S8gCgoJzaJZqcnKzg4OBc4/axI0eOXPXeUaNGaf/+/Zo0aZImTpwoSSpbtqxWrlypDh06XPd7p6Sk6NixYznGEhISClI+AHeVlia98YY0aZKUmmrGypUzOz8HDzZLoQBQAjkV2FJTU3Msadr5+fllv381vr6+CgkJUZcuXdSpUydlZmZq7ty5evTRR7V27Vq1vE7bl9mzZ2vcuHHOlA3AncXGSs89J/36q2Ose3ez8/Omm6yrCwAKgVOBzd/fX+np6bnG09LSst+/moEDB2rLli3asWOHvL3NimzXrl0VGhqq5557Tlu3br3m9+7fv7+ioqJyjCUkJKhjx44F/CkAuIWEBDN7tmaNY6xxY+nNN6XWrS0rCwAKk1OBLTg4WIcPH841npycLEmqUaNGnvddunRJ8+bN07Bhw7LDmiSVLl1a999/v2bNmqVLly6pTJkyV/3eQUFBCgoKcqZsAO7kwgVp8mRpyhTp0iUzFhBg+n7262cOwgUAN+HUpoOIiAjFx8frrH3X1e/ss2MRV2nncuLECWVkZCgzMzPXe5cvX1ZWVlae7wFANptN+vhjc27a5MkmrHl5SX36SPHxpicoYQ2Am3EqsHXp0iX72TO79PR0LViwQJGRkapVq5Yk6eDBg9q7d2/2Z4KCglSxYkV98sknumT/F7Gk8+fPa/Xq1WrYsCFHewC4ut27pb/9TerWTTp0yIw1by5t2WJaTTH7DsBNOfXP0MjISEVFRWnEiBFKSUlRgwYNtGjRIiUmJmrevHnZn+vVq5c2bNgg+9m8Pj4+Gjp0qF5++WW1bNlSvXr1UmZmpubNm6dDhw5p6dKlhfNTAXAv586Zg29nzDB9QCWpalXp1VelJ56QvJ1u2gIAJYLT6waLFy/WqFGjtGTJEp06dUrh4eGKjY1Vq1atrnnfyJEjVa9ePc2YMUPjxo1Tenq6wsPDtWLFCnXu3NnZcgC4I5tN+ugj6YUXJPtxQd7e0oAB5uDbSpWsrQ8AiolTralcDa2pADe0d68JZuvXO8buuEOaPVsKD7euLgC4QcXWmgoAisyFC9KIESaU2cNaYKC0cKH07beENQAeia1UAFyDzSZ98ok5U83ej9jb2xzRMWECy58APBqBDYD19u0zx3F88YVjLDLSLH82a2ZdXQDgIlgSBWCd1FRp9GgpLMwR1ipXlt59V/ruO8IaAPyOGTYA1li9Who0SEpMNNdeXtJTT0mvvCJVqWJpaQDgaghsAIrX/v2mSfvq1Y6xZs3M8mdkpHV1AYALY0kUQPFISzObBxo1coS1ihWlt96Stm0jrAHANTDDBqDoffGFNHCglJDgGOvdW3rtNdpJAUA+ENgAFJ2kJHNMx6pVjrHwcDOrduedlpUFACUNS6IACt+lS2b2rGFDR1j705+k6dOlH34grAFAATHDBqBwrV9vWkrt3esY69FDmjJFCg62ri4AKMEIbAAKx5Ejpkn7smWOsUaNzPJnmzaWlQUA7oAlUQA35vJlaepU6ZZbHGGtXDkpJkbauZOwBgCFgBk2AM775huz/Llrl2MsKsoEuJo1rasLANwMM2wACu7oUalXL6l1a0dYCwkxx3d8/DFhDQAKGYENQP5lZEizZpnlzyVLzJi/vzRpkvS//0n33GNtfQDgplgSBZA/mzdL/fub59LsOnaUpk2T6ta1qCgA8AzMsAG4tuPHpT59pNtvd4S1//s/KTZW+uQTwhoAFAMCG4C8ZWZKc+aYZ9Pmzzdjvr7S2LHmubUHHrC0PADwJCyJAsht+3az/Pn9946xdu2kmTOl+vWtqwsAPBQzbAAcTp6U+vWTWrRwhLU6daRPPzVLoIQ1ALAEM2wApKwsadEiadgw88yaJJUuLb34ojRypFS2rLX1AYCHI7ABnm7nTnP47XffOcbuvtsc3xESYllZAAAHlkQBT3XmjDRokHTrrY6wdtNN5uDbL74grAGAC2GGDfA0Npv0/vvS0KGmY4EklSolDRkijR4tlS9vbX0AgFwIbIAn2bXLLH9+841jrHVr6a23pNBQ6+oCAFwTS6KAJzh3zsyoRUQ4wlr16mam7T//IawBgItjhg1wZzabeSbt+eelI0fMmI+PFB1tDsANCLC0PABA/hDYAHe1d68JZuvWOcbuuMMsfzZpYl1dAIACY0kUcDcXLkgjRkjh4Y6wFhgoLVhglkMJawBQ4jDDBrgLm810JBg8WDp40Ix5eZnOBRMnSpUqWVkdAOAGENgAd3DwoOn9uWaNY6xFC2n2bHPOGgCgRGNJFCjJMjOl6dOlRo0cYa1yZWnuXGnzZsIaALgJZtiAkiouTurbV9q+3THWu7cUEyNVrWpZWQCAwscMG1DSXLxomrQ3b+4Iaw0aSF99ZTYWENYAwO0wwwaUJF98YTYR7N9vrkuVMuHt5Zclf39rawMAFBkCG1ASpKSYw2/ff98x1rKleVatcWPr6gIAFAuWRAFXZrNJCxdKf/6zI6z96U/SrFnSxo2ENQDwEMywAa5q3z7pmWdMr0+7jh2lN9+Uata0rCwAQPFjhg1wNZcuSZMmmdkze1irUUNatUr65BPCGgB4IGbYAFeyebM5qmPXLnNt71QweTKN2gHAgxHYAFdw5oz00kvS22+b59YkKTRUevdd6bbbrK0NAGA5lkQBq336qelUMHu2CWu+vqb3544dhDUAgCRm2ADrHD4sRUeb59Ls2raV3nlHCgmxri4AgMthhg0obpmZ0ltvmaM67GGtUiVp/nzTrYCwBgC4AjNsQHH68UezqWDLFsdYjx7StGlSUJB1dQEAXBozbEBxSE2VRo6UmjVzhLW6daV//9sciEtYAwBcAzNsQFFbv94cgJuQYK59fEybqTFjpHLlrK0NAFAiENiAonLihDR0qGktZXfrreaojqZNLSsLAFDysCQKFDabzSxzNmzoCGvlypnn1LZsIawBAAqMGTagMP36q+lM8OWXjrF27cwZa3XqWFcXAKBEY4YNKAwZGVJMjBQW5ghr1apJH30kxcYS1gAAN4QZNuBGff+9Oapj507H2NNPS6+9Zs5XAwDgBjHDBjjr/HlpyBCpZUtHWLvlFmnDBmnuXMIaAKDQMMMGOGPNGvOsWlKSuS5d2jRvHzHC9AIFAKAQOT3Dlp6eruHDh6tGjRry9/dXZGSk1q5dm+/7P/roI912220qV66cKlasqNtvv13r1693thygePz2m9Stm/Tgg46wduedZoZt7FjCGgCgSDgd2Hr37q2pU6eqZ8+emjFjhnx8fNSuXTtt3LjxuveOHTtW3bt3V61atTR16lRNnDhR4eHhOnz4sLPlAEUrK8ucn9awofTxx2YsIECaM8csgTZqZG19AAC35tSS6LZt27Rs2TLFxMRo6NChkqRevXopLCxMw4YN03fffXfVe7ds2aLx48frjTfe0JAhQ5yrGihOe/aYTgXffusYi4qSZsyQgoOtqwsA4DGcmmFbsWKFfHx81Ldv3+wxPz8/9enTR5s3b1aSfakoD9OnT1f16tX13HPPyWaz6fz5886UABS99HSzzBkR4QhrtWpJn39uZtkIawCAYuJUYIuLi1NISIgqVKiQY7xFixaSpJ1/PN7gCl999ZWaN2+umTNnKjAwUH/6058UHBysWbNm5et7p6SkaPfu3Tm+Euw9GoHC8u23JqiNGydduiR5eUnPPSft3i21b291dQAAD+PUkmhycrKC85hdsI8dOXIkz/tOnTql48ePa9OmTVq/fr3GjBmj2rVra8GCBYqOjlbp0qX1zDPPXPN7z549W+PGjXOmbOD6Tp2Shg83z6vZNWlirps3t64uAIBHcyqwpaamyjeP3XB+fn7Z7+fFvvx54sQJLVu2TN26dZMkdenSRY0bN9bEiROvG9j69++vqKioHGMJCQnq2LFjQX8MwMFmk5YvlwYNko4eNWP+/mZJdMgQc2wHAAAWcSqw+fv7Kz09Pdd4Wlpa9vtXu0+SSpcurS5dumSPe3t7q1u3bhozZowOHjyo2rVrX/V7BwUFKSgoyJmygbwdPCj172/OVrO7+27p7bel+vWtqwsAgN859QxbcHCwkpOTc43bx2rUqJHnfZUrV5afn5+qVKkiHx+fHO/ZQ9ipU6ecKQkouMxMafp0cySHPaxVrSotWSJ98QVhDQDgMpwKbBEREYqPj9fZs2dzjG/dujX7/Ty/mbe3IiIidOzYMV26dCnHe/bn3gIDA50pCSiYuDjTUmrIEOnCBTP2+OPmCI9HHzWbDAAAcBFOBbYuXbooMzNTc+fOzR5LT0/XggULFBkZqVq1akmSDh48qL179+a4t1u3bsrMzNSiRYuyx9LS0vT++++rUaNGV52dAwrFxYvSsGFmA8H27Wasfn1p3Tpp4UIzwwYAgItx6hm2yMhIRUVFacSIEUpJSVGDBg20aNEiJSYmat68edmf69WrlzZs2CCbzZY99swzz+i9997TgAEDFB8fr9q1a2vJkiU6cOCAVq9efeM/EXA1X34pPfustH+/uS5VSnrxRWnUKLPBAAAAF+V08/fFixdr1KhRWrJkiU6dOqXw8HDFxsaqVatW17zP399f69ev17BhwzR//nxduHBBERERWrNmje69915nywGu7tw56YUXch7VERkpzZ0rhYdbVxcAAPnkZfvj9FcJtXv3boWFhWnXrl0KDQ21uhy4kvXrpSeflA4cMNfly0uvvCL16yddsfEFAIDi4ExucXqGDXBpFy6YA3Dfessx1ratNH++VLeuZWUBAOAMAhvcz8aNUu/e0i+/mOuyZaXXXzezat5O7bMBAMBS/O0F95Gaap5Va9XKEdbuvFP673+lAQMIawCAEosZNriHrVvNOWo//2yu/fykyZNNqymeVQMAlHBMOaBkS0+XRoyQbr/dEdYiI83BuEOGENYAAG6BGTaUXDt2mFm1XbvMdZky0rhx0tCh5ow1AADcBH+roeS5fFmaNMl8ZWSYsaZNpcWLpbAwa2sDAKAIENhQsvz4o5lVi4sz16VKmU4FI0ZIpUtbWxsAAEWEwIaSISNDiomRxowxM2yS1LixtGiRmV0DAMCNEdjg+vbsMeeqbdtmrr29pX/8Qxo9WvL1tbQ0AACKA4ENriszU5o+XRo50uwGlaSGDc2sWosWlpYGAEBxIrDBNSUkmFm1TZvMtZeX9Pzz0oQJkr+/paUBAFDcCGxwLVlZ0uzZpg/oxYtmrH59aeFC07UAAAAPxMG5cB2JidLf/y5FRzvCWnS0aS1FWAMAeDBm2GA9m016913TB/T8eTNWt640f77Utq2lpQEA4AqYYYO1Dh2S7r9feuYZR1jr21f63/8IawAA/I4ZNljDZjO7PQcPls6cMWM33STNmyfde6+lpQEA4GqYYUPxS06WOnSQnnjCEdZ69zY9QQlrAADkwgwbio/NJi1bJg0cKJ08acaqV5fmzpXat7e2NgAAXBgzbCgex45JUVFSjx6OsNajh5lVI6wBAHBNzLCh6K1aJT37rAltkhQYKL39ttS5s7V1AQBQQjDDhqJz8qTUs6cJZvaw1rmzmVUjrAEAkG/MsKFoxMZKTz8t/fabua5USXrrLemRR0ybKQAAkG/MsKFwnTljdn+2b+8Iaw8+KO3eLXXvTlgDAMAJzLCh8Hz5pdSnjzkMV5IqVJBmzJAef5ygBgDADWCGDTfu3DmzqeDeex1h7Z57zLNqvXsT1gAAuEHMsOHGfP21WQJNTDTX5ctLb7xhnl8jqAEAUCiYYYNzLl6UBg0y/T7tYa1NG+nHH00vUMIaAACFhhk2FNymTWapMyHBXPv7S6+9Jg0YIHnzbwAAAAobf7si/9LSpBdflP76V0dYu/126b//laKjCWsAABQRZtiQP9u2md2ee/eaa19fadIkafBgycfH0tIAAHB3BDZcW3q6NH68WfLMzDRjzZtLixZJf/6ztbUBAOAhCGy4up07pV69zEYCSSpdWho7Vho2TCrF/3QAACgu/K2L3C5fll55RZowQcrIMGNNm5pZtcaNra0NAAAPRGBDTvbDbn/4wVyXKiWNHGm+Spe2tDQAADwVgQ1GVpYUEyONHi1dumTGQkOlxYulZs2srQ0AAA/HOQwwXnxR+sc/TFjz9javf/iBsAYAgAtghg3m2bSpU83r+vWlpUulli2trQkAAGQjsHm6LVtMKylJqlpVWrdOqlvX0pIAAEBOLIl6ssOHpYcfNsugpUpJK1YQ1gAAcEEENk+Vmip17Cj99pu5fvNNqXVrS0sCAAB5I7B5IptNeuopaft2c/3ss+YLAAC4JAKbJ4qJkT74wLxu3VqaMcPaegAAwDUR2DzNmjXmyA5JqlNHWr5cKlPG2poAAMA1Edg8yZ49Uo8eZkm0XDnp88+lwECrqwIAANdBYPMUp05JDz0knT1rrhcvlsLDra0JAADkC4HNE2RkSN26SQkJ5nrsWKlTJ0tLAgAA+Udg8wTDhklr15rXnTtLo0ZZWw8AACgQApu7W7hQmjbNvA4PN9fe/NoBAChJ+JvbnW3eLD3zjHldtar02WdS+fLW1gQAAAqMwOauDh0yz6nRdgoAgBKPwOaOaDsFAIBbIbC5G3vbqR9+MNf9+tF2CgCAEo7A5m5ef522UwAAuBkCmztZs0YaMcK8rlvXtJ0qXdrSkgAAwI1zOrClp6dr+PDhqlGjhvz9/RUZGam19rO+CuDuu++Wl5eXBg4c6GwpkEzbqe7dHW2nPvuMtlMAALgJpwNb7969NXXqVPXs2VMzZsyQj4+P2rVrp40bN+b7z1i1apU2b97sbAmws7edOnfOXNN2CgAAt+JUYNu2bZuWLVumV155RTExMerbt6/Wr1+vOnXqaNiwYfn6M9LS0vTCCy9o+PDhzpQAO9pOAQDg9pwKbCtWrJCPj4/69u2bPebn56c+ffpo8+bNSkpKuu6f8frrrysrK0tDhw51pgTY0XYKAAC3V8qZm+Li4hQSEqIKFSrkGG/RooUkaefOnapVq9ZV7z948KBeffVVzZ8/X/7+/gX63ikpKTp27FiOsQT77JKnoe0UAAAewanAlpycrODg4Fzj9rEjR45c8/4XXnhBTZs21SOPPFLg7z179myNGzeuwPe5HdpOAQDgMZwKbKmpqfL19c017ufnl/3+1fznP//RypUrtXXrVme+tfr376+oqKgcYwkJCerYsaNTf16JdOiQ9PDDjrZTK1fSdgoAADfmVGDz9/dXenp6rvG0tLTs9/OSkZGhQYMG6bHHHlPz5s2d+dYKCgpSUFCQU/e6BXvbqaNHzfWsWVKrVpaWBAAAipZTgS04OFiHDx/ONZ6cnCxJqlGjRp73LV68WD///LPmzJmjxMTEHO+dO3dOiYmJCgoKUtmyZZ0py/3ZbFKfPjnbTtmXRQEAgNty6gn1iIgIxcfH6+zZsznG7cucERERed538OBBXb58WXfccYfq1auX/SWZMFevXj19+eWXzpTkGV5/XfrwQ/O6TRvaTgEA4CGcmmHr0qWLpkyZorlz52Yfy5Genq4FCxYoMjIye4fowYMHdfHiRTVs2FCS9Mgjj+QZ5h5++GG1a9dOTz/9tCIjI538UdxcbCxtpwAA8FBOBbbIyEhFRUVpxIgRSklJUYMGDbRo0SIlJiZq3rx52Z/r1auXNmzYIJvNJklq2LBhdni7Ur169Txr40BB/PST1KNHzrZTVataXRUAACgmTgU2ySxhjho1SkuWLNGpU6cUHh6u2NhYteIB+MJ18qTUoYOj7dSSJbSdAgDAwzgd2Pz8/BQTE6OYmJirfubrr7/O159ln4HDFa5sOzVunDnOAwAAeBSOxXdlL74orVtnXnfuLL38srX1AAAASxDYXNWCBdL06eZ1kybSokW0nQIAwEORAFzRd99Jzz5rXtvbTpUrZ21NAADAMgQ2V5OUJHXqlLPtVJ06VlcFAAAsRGBzJRcvmk0FtJ0CAAB/QGBzFVe2nerfn7ZTAABAEoHNdbz2mrRsmXndpo1jwwEAAPB4BDZXsHq19NJL5jVtpwAAwBUIbFb76SepZ09H26nPP6ftFAAAyIHAZqWTJ6WHHsrZdqpxY2trAgAALofAZhV726lffjHX48fTdgoAAOSJwGaVoUMdbae6dKHtFAAAuCoCmxXmz5dmzDCvmzSRFi6UvLwsLQkAALguAltx+2PbqcBA2k4BAIDrIrAVJ3vbqcuXaTsFAADyjcBWXC5elDp2dLSdeust6a9/tbQkAABQMhDYioO97dSOHeZ6wACpb19rawIAACUGga04vPqqo+1U27bStGnW1gMAAEoUAltRW71aGjnSvK5XT/r4Y9pOAQCAAiGwFaUr20599hltpwAAQIER2IrKlW2nli6l7RQAAHAKga0oZGRIXbvmbDvVsaOlJQEAgJKLwFYUXnhB+uor8zoqirZTAADghhDYCtu8edLMmeZ1RIS0YAFtpwAAwA0hsBWmTZukfv3M68BA6dNPaTsFAABuGIGtsBw86Gg7Vbo0bacAAEChIbAVBnvbqZQUc03bKQAAUIgIbDfKZpOefFKKizPXAwZITz9tbU0AAMCtENhu1CuvSB99ZF7TdgoAABQBAtuN+Pxzx5Ed9epJy5fTdgoAABQ6Apuzdu92tJ0qX96EtypVrK4KAAC4IQKbM06cMG2nzp8310uXSmFh1tYEAADcFoGtoDIypG7dpF9/NdcTJkgdOlhbEwAAcGsEtoK6su3UyJHW1gMAANwega0gaDsFAAAsQGDLryvbTn32GW2nAABAsSCw5cfhwznbTq1aJdWubXVVAADAQxDY8qNqVal9e/P6rbekO++0th4AAOBRSlldQIng6yu9+6706KNSmzZWVwMAADwMM2z55eVFWAMAAJYgsAEAALg4AhsAAICLI7ABAAC4OAIbAACAiyOwAQAAuDgCGwAAgIsjsAEAALg4AhsAAICLc4tOB+np6ZKkhIQEiysBAAC4NnteseeX/HCLwJaUlCRJ6tixo7WFAAAA5FNSUpKaNWuWr8962Ww2WxHXU+ROnz6tDRs2qFatWvL19bW6nBIrISFBHTt21KeffqoGDRpYXQ4KiN9fycfvsOTjd1jyFcfvMD09XUlJSWrdurUqVqyYr3vcYoatYsWK6tChg9VluI0GDRooNDTU6jLgJH5/JR+/w5KP32HJV9S/w/zOrNmx6QAAAMDFEdgAAABcHIENAADAxRHYkC0wMFBjxoxRYGCg1aXACfz+Sj5+hyUfv8OSz1V/h26xSxQAAMCdMcMGAADg4ghsAAAALo7ABgAA4OIIbAAAAC6OwAYAAODiCGwAAAAujsDm4b7//nsNHDhQoaGhKleunGrXrq2uXbsqPj7e6tLgpEmTJsnLy0thYWFWl4IC2LFjhx566CFVrlxZZcuWVVhYmGbOnGl1Wcinffv26ZFHHlHNmjVVtmxZNWzYUOPHj9fFixetLg1XOH/+vMaMGaP77rtPlStXlpeXlxYuXJjnZ/fs2aP77rtP5cuXV+XKlfXYY4/p2LFjxVvw79yi+Tuc99prr2nTpk2KiopSeHi4fvvtN82aNUvNmjXTli1b+Eu/hDl06JAmT56scuXKWV0KCuDLL79U+/bt1bRpU40aNUrly5fXL7/8okOHDlldGvIhKSlJLVq0UEBAgAYOHKjKlStr8+bNGjNmjH744Qd99tlnVpeIPzh+/LjGjx+v2rVrq0mTJvr666/z/NyhQ4fUqlUrBQQEaPLkyTp//rymTJmiH3/8Udu2bVOZMmWKt3AbPNqmTZts6enpOcbi4+Ntvr6+tp49e1pUFZzVrVs321133WVr3bq1LTQ01OpykA9nzpyxVatWzfbwww/bMjMzrS4HTpg0aZJNkm3Xrl05xnv16mWTZDt58qRFlSEvaWlptuTkZJvNZrN9//33Nkm2BQsW5Ppcv379bP7+/rYDBw5kj61du9YmyTZnzpziKjcbS6Ie7vbbb8/1r4Sbb75ZoaGh2rNnj0VVwRnffPONVqxYoenTp1tdCgrggw8+0NGjRzVp0iR5e3vrwoULysrKsrosFMDZs2clSdWqVcsxHhwcLG9v7+KficE1+fr6qnr16tf93MqVK/Xggw+qdu3a2WN///vfFRISoo8//rgoS8wTgQ252Gw2HT16VFWrVrW6FORTZmamoqOj9dRTT6lx48ZWl4MCWLdunSpUqKDDhw/rlltuUfny5VWhQgX169dPaWlpVpeHfGjTpo0kqU+fPtq5c6eSkpL00Ucf6e2339agQYN4RKEEOnz4sFJSUvSXv/wl13stWrRQXFxcsddEYEMu77//vg4fPqxu3bpZXQry6Z133tGBAwc0YcIEq0tBAe3bt08ZGRnq0KGD7r33Xq1cuVJPPvmk3nnnHT3xxBNWl4d8uO+++zRhwgStXbtWTZs2Ve3atfXII48oOjpa06ZNs7o8OCE5OVmSmSW9UnBwsE6ePKn09PRirYlNB8hh7969GjBggG677TY9/vjjVpeDfDhx4oRGjx6tUaNGKTAw0OpyUEDnz5/XxYsX9eyzz2bvCu3UqZMuXbqkOXPmaPz48br55pstrhLXU7duXbVq1UqdO3dWlSpVtGbNGk2ePFnVq1fXwIEDrS4PBZSamirJLJ9eyc/PL/szeb1fVAhsyPbbb7/pgQceUEBAgFasWCEfHx+rS0I+vPzyy6pcubKio6OtLgVO8Pf3lyR17949x3iPHj00Z84cbd68mcDm4pYtW6a+ffsqPj5eNWvWlGRCd1ZWloYPH67u3burSpUqFleJgrD//zKvWTT7owr2zxQXlkQhSTpz5ozuv/9+nT59Wv/+979Vo0YNq0tCPuzbt09z587VoEGDdOTIESUmJioxMVFpaWm6fPmyEhMTdfLkSavLxDXY/7925QPrQUFBkqRTp04Ve00omNmzZ6tp06bZYc3uoYce0sWLFy153gk3xr4Ual8a/aPk5GRVrly5WGfXJAIbZP610L59e8XHxys2NlaNGjWyuiTk0+HDh5WVlaVBgwapXr162V9bt25VfHy86tWrp/Hjx1tdJq7h1ltvlWR+l3905MgRSWKZuwQ4evSoMjMzc41fvnxZkpSRkVHcJeEG3XTTTQoMDNT27dtzvbdt2zZFREQUe00ENg+XmZmpbt26afPmzVq+fLluu+02q0tCAYSFhemTTz7J9RUaGqratWvrk08+UZ8+fawuE9fQtWtXSdK8efNyjL/33nsqVapU9g5EuK6QkBDFxcXl6hDz4YcfytvbW+Hh4RZVhhvRuXNnxcbGKikpKXvsq6++Unx8vKKiooq9Hi+bzWYr9u8KlzF48GDNmDFD7du3z/6L448effRRC6rCjWrTpo2OHz+uXbt2WV0K8qFPnz6aP3++unbtqtatW+vrr7/W8uXLNWLECE2ePNnq8nAd33zzje666y5VqVJFAwcOVJUqVRQbG6t//etfeuqpp/Tuu+9aXSKuMGvWLJ0+fVpHjhzR22+/rU6dOqlp06aSpOjoaAUEBCgpKUlNmzZVxYoV9dxzz+n8+fOKiYlRzZo19f333xf7kiiBzcO1adNGGzZsuOr7/M+jZCKwlSyXL1/W5MmTtWDBAh05ckR16tTRgAEDNHjwYKtLQz5t27ZNY8eOVVxcnE6cOKF69erp8ccf17Bhw1SqFPv7XE3dunV14MCBPN/bv3+/6tatK0navXu3nn/+eW3cuFFlypTRAw88oDfeeCPXM6fFgcAGAADg4niGDQAAwMUR2AAAAFwcgQ0AAMDFEdgAAABcHIENAADAxRHYAAAAXByBDQAAwMUR2AAAAFwcgQ0AAMDFEdgAAABcHIENAADAxRHYAAAAXByBDQAAwMUR2AAAAFzc/wPT3IjUFhwI4wAAAABJRU5ErkJggg==",
      "text/plain": [
       "<Figure size 720x360 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "import pprint\n",
    "from sklearn.decomposition import PCA\n",
    "\n",
    "pca = PCA(n_components=10)\n",
    "pca.fit(X_data)\n",
    "y = pca.explained_variance_ratio_\n",
    "\n",
    "result = np.array(y).cumsum()\n",
    "pprint.pp(result) \n",
    "\n",
    "import matplotlib.pyplot as plt\n",
    "\n",
    "plt.figure(figsize=(6,3),dpi=120)\n",
    "plt.plot(range(1,11), result, c='r')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "# pca = PCA(n_components=5)\n",
    "# X_pca_data = pca.fit_transform(X_data)\n",
    "\n",
    "# np.savez(\"pca_data.npz\",X=X_pca_data, Y=Y_data,)\n",
    "\n",
    "np.savez(\"data.npz\",X=X_data, Y=Y_data,)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "py310",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
